/** * License Agreement for OpenSearchServer * * Copyright (C) 2015 Emmanuel Keller / Jaeksoft * * http://www.open-search-server.com * * This file is part of OpenSearchServer. * * OpenSearchServer is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * OpenSearchServer is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenSearchServer. * If not, see <http://www.gnu.org/licenses/>. **/ package com.jaeksoft.searchlib.parser; import java.io.IOException; import java.util.List; import java.util.Properties; import javax.activation.DataSource; import javax.mail.Address; import javax.mail.Session; import javax.mail.internet.MimeMessage; import org.apache.commons.lang3.StringUtils; import org.apache.commons.mail.util.MimeMessageParser; import com.jaeksoft.searchlib.SearchLibException; import com.jaeksoft.searchlib.analysis.LanguageEnum; import com.jaeksoft.searchlib.streamlimiter.StreamLimiter; public class EmlParser extends Parser { public static final String[] DEFAULT_MIMETYPES = { "message/rfc822" }; public static final String[] DEFAULT_EXTENSIONS = { "eml" }; private static ParserFieldEnum[] fl = { ParserFieldEnum.parser_name, ParserFieldEnum.email_display_from, ParserFieldEnum.email_display_to, ParserFieldEnum.email_display_cc, ParserFieldEnum.email_display_bcc, ParserFieldEnum.email_conversation_topic, ParserFieldEnum.subject, ParserFieldEnum.content, ParserFieldEnum.email_sent_date, ParserFieldEnum.email_received_date, ParserFieldEnum.email_attachment_name, ParserFieldEnum.email_attachment_type, ParserFieldEnum.email_attachment_content, ParserFieldEnum.htmlSource, ParserFieldEnum.lang }; public EmlParser() { super(fl); } private final static Properties JAVAMAIL_PROPS = new Properties(); static { JAVAMAIL_PROPS.put("mail.host", "localhost"); JAVAMAIL_PROPS.put("mail.transport.protocol", "smtp"); } @Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum lang) throws IOException, SearchLibException { Session session = Session.getDefaultInstance(JAVAMAIL_PROPS); try { MimeMessage mimeMessage = new MimeMessage(session, streamLimiter.getNewInputStream()); MimeMessageParser mimeMessageParser = new MimeMessageParser( mimeMessage).parse(); ParserResultItem result = getNewParserResultItem(); String from = mimeMessageParser.getFrom(); if (from != null) result.addField(ParserFieldEnum.email_display_from, from.toString()); for (Address address : mimeMessageParser.getTo()) result.addField(ParserFieldEnum.email_display_to, address.toString()); for (Address address : mimeMessageParser.getCc()) result.addField(ParserFieldEnum.email_display_cc, address.toString()); for (Address address : mimeMessageParser.getBcc()) result.addField(ParserFieldEnum.email_display_bcc, address.toString()); result.addField(ParserFieldEnum.subject, mimeMessageParser.getSubject()); result.addField(ParserFieldEnum.htmlSource, mimeMessageParser.getHtmlContent()); result.addField(ParserFieldEnum.content, mimeMessageParser.getPlainContent()); result.addField(ParserFieldEnum.email_sent_date, mimeMessage.getSentDate()); result.addField(ParserFieldEnum.email_received_date, mimeMessage.getReceivedDate()); for (DataSource dataSource : mimeMessageParser.getAttachmentList()) { result.addField(ParserFieldEnum.email_attachment_name, dataSource.getName()); result.addField(ParserFieldEnum.email_attachment_type, dataSource.getContentType()); if (parserSelector == null) continue; Parser attachParser = parserSelector.parseStream( getSourceDocument(), dataSource.getName(), dataSource.getContentType(), null, dataSource.getInputStream(), null, null, null); if (attachParser == null) continue; List<ParserResultItem> parserResults = attachParser .getParserResults(); if (parserResults != null) for (ParserResultItem parserResult : parserResults) result.addField( ParserFieldEnum.email_attachment_content, parserResult); } if (StringUtils.isEmpty(mimeMessageParser.getHtmlContent())) result.langDetection(10000, ParserFieldEnum.content); else result.langDetection(10000, ParserFieldEnum.htmlSource); } catch (Exception e) { throw new IOException(e); } } }